// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target
// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
// RUN:   -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
// RUN:   -target-feature +power8-vector -triple powerpc64le-unknown-unknown \
// RUN:   -emit-llvm %s -o - | FileCheck %s -check-prefixes=CHECK,CHECK-P8
#include <altivec.h>

// CHECK-LABEL: @test1(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <8 x i16>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8
// CHECK-NEXT:    store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8
// CHECK-NEXT:    store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i16>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load i16*, i16** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <8 x i16>, <8 x i16>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <8 x i16>*
// CHECK-NEXT:    store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test1(vector signed short *c, signed short *st, const signed short *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test2(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <8 x i16>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i16*, align 8
// CHECK-NEXT:    store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8
// CHECK-NEXT:    store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8
// CHECK-NEXT:    store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i16>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load i16*, i16** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <8 x i16>, <8 x i16>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <8 x i16>*
// CHECK-NEXT:    store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test2(vector unsigned short *c, unsigned short *st,
           const unsigned short *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8
// CHECK-NEXT:    store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8
// CHECK-NEXT:    store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i32>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <4 x i32>*
// CHECK-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test3(vector signed int *c, signed int *st, const signed int *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test4(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8
// CHECK-NEXT:    store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8
// CHECK-NEXT:    store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i32>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <4 x i32>*
// CHECK-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test4(vector unsigned int *c, unsigned int *st, const unsigned int *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test5(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <2 x i64>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8
// CHECK-NEXT:    store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8
// CHECK-NEXT:    store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <2 x i64>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load i64*, i64** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i64* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>*
// CHECK-NEXT:    store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test5(vector signed long long *c, signed long long *st,
           const signed long long *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test6(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <2 x i64>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca i64*, align 8
// CHECK-NEXT:    store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8
// CHECK-NEXT:    store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8
// CHECK-NEXT:    store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <2 x i64>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load i64*, i64** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i64* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>*
// CHECK-NEXT:    store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test6(vector unsigned long long *c, unsigned long long *st,
           const unsigned long long *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test7(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <4 x float>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca float*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca float*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x float>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca float*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca float*, align 8
// CHECK-NEXT:    store <4 x float>* [[C:%.*]], <4 x float>** [[C_ADDR]], align 8
// CHECK-NEXT:    store float* [[ST:%.*]], float** [[ST_ADDR]], align 8
// CHECK-NEXT:    store float* [[LD:%.*]], float** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load float*, float** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store float* [[TMP0]], float** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load float*, float** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x float>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load float*, float** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <4 x float> [[TMP9]], <4 x float>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store float* [[TMP10]], float** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load float*, float** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <4 x float>*
// CHECK-NEXT:    store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test7(vector float *c, float *st, const float *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-LABEL: @test8(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <2 x double>, align 16
// CHECK-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca double*, align 8
// CHECK-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca double*, align 8
// CHECK-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <2 x double>*, align 8
// CHECK-NEXT:    [[ST_ADDR:%.*]] = alloca double*, align 8
// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca double*, align 8
// CHECK-NEXT:    store <2 x double>* [[C:%.*]], <2 x double>** [[C_ADDR]], align 8
// CHECK-NEXT:    store double* [[ST:%.*]], double** [[ST_ADDR]], align 8
// CHECK-NEXT:    store double* [[LD:%.*]], double** [[LD_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[LD_ADDR]], align 8
// CHECK-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    store double* [[TMP0]], double** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[__PTR_ADDR_I]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to i8*
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <2 x double>*
// CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 1
// CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8
// CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 16
// CHECK-NEXT:    [[TMP8:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8
// CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[TMP8]], align 16
// CHECK-NEXT:    [[TMP10:%.*]] = load double*, double** [[ST_ADDR]], align 8
// CHECK-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    store double* [[TMP10]], double** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP11:%.*]] = load double*, double** [[__PTR_ADDR_I2]], align 8
// CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to i8*
// CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[__VEC_ADDR_I]], align 16
// CHECK-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x double>*
// CHECK-NEXT:    store <2 x double> [[TMP14]], <2 x double>* [[TMP16]], align 1
// CHECK-NEXT:    ret void
//
void test8(vector double *c, double *st, const double *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

#ifdef __POWER8_VECTOR__
// CHECK-P8-LABEL: @test9(
// CHECK-P8-NEXT:  entry:
// CHECK-P8-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <1 x i128>, align 16
// CHECK-P8-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-P8-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-P8-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-P8-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-P8-NEXT:    [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8
// CHECK-P8-NEXT:    [[ST_ADDR:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    [[LD_ADDR:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8
// CHECK-P8-NEXT:    store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8
// CHECK-P8-NEXT:    store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8
// CHECK-P8-NEXT:    [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8
// CHECK-P8-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-P8-NEXT:    store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8*
// CHECK-P8-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-P8-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-P8-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <1 x i128>*
// CHECK-P8-NEXT:    [[TMP6:%.*]] = load <1 x i128>, <1 x i128>* [[TMP5]], align 1
// CHECK-P8-NEXT:    [[TMP7:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8
// CHECK-P8-NEXT:    store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16
// CHECK-P8-NEXT:    [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8
// CHECK-P8-NEXT:    [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16
// CHECK-P8-NEXT:    [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8
// CHECK-P8-NEXT:    store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16
// CHECK-P8-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-P8-NEXT:    store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8
// CHECK-P8-NEXT:    [[TMP11:%.*]] = load i128*, i128** [[__PTR_ADDR_I2]], align 8
// CHECK-P8-NEXT:    [[TMP12:%.*]] = bitcast i128* [[TMP11]] to i8*
// CHECK-P8-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-P8-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-P8-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-P8-NEXT:    [[TMP14:%.*]] = load <1 x i128>, <1 x i128>* [[__VEC_ADDR_I]], align 16
// CHECK-P8-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-P8-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <1 x i128>*
// CHECK-P8-NEXT:    store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1
// CHECK-P8-NEXT:    ret void
//
void test9(vector signed __int128 *c, signed __int128 *st,
           const signed __int128 *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}

// CHECK-P8-LABEL: @test10(
// CHECK-P8-NEXT:  entry:
// CHECK-P8-NEXT:    [[__VEC_ADDR_I:%.*]] = alloca <1 x i128>, align 16
// CHECK-P8-NEXT:    [[__OFFSET_ADDR_I1:%.*]] = alloca i64, align 8
// CHECK-P8-NEXT:    [[__PTR_ADDR_I2:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    [[__ADDR_I3:%.*]] = alloca i8*, align 8
// CHECK-P8-NEXT:    [[__OFFSET_ADDR_I:%.*]] = alloca i64, align 8
// CHECK-P8-NEXT:    [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    [[__ADDR_I:%.*]] = alloca i8*, align 8
// CHECK-P8-NEXT:    [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8
// CHECK-P8-NEXT:    [[ST_ADDR:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    [[LD_ADDR:%.*]] = alloca i128*, align 8
// CHECK-P8-NEXT:    store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8
// CHECK-P8-NEXT:    store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8
// CHECK-P8-NEXT:    store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8
// CHECK-P8-NEXT:    [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8
// CHECK-P8-NEXT:    store i64 3, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-P8-NEXT:    store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8*
// CHECK-P8-NEXT:    [[TMP3:%.*]] = load i64, i64* [[__OFFSET_ADDR_I]], align 8
// CHECK-P8-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 [[TMP3]]
// CHECK-P8-NEXT:    store i8* [[ADD_PTR_I]], i8** [[__ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[__ADDR_I]], align 8
// CHECK-P8-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <1 x i128>*
// CHECK-P8-NEXT:    [[TMP6:%.*]] = load <1 x i128>, <1 x i128>* [[TMP5]], align 1
// CHECK-P8-NEXT:    [[TMP7:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8
// CHECK-P8-NEXT:    store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16
// CHECK-P8-NEXT:    [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8
// CHECK-P8-NEXT:    [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16
// CHECK-P8-NEXT:    [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8
// CHECK-P8-NEXT:    store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16
// CHECK-P8-NEXT:    store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-P8-NEXT:    store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8
// CHECK-P8-NEXT:    [[TMP11:%.*]] = load i128*, i128** [[__PTR_ADDR_I2]], align 8
// CHECK-P8-NEXT:    [[TMP12:%.*]] = bitcast i128* [[TMP11]] to i8*
// CHECK-P8-NEXT:    [[TMP13:%.*]] = load i64, i64* [[__OFFSET_ADDR_I1]], align 8
// CHECK-P8-NEXT:    [[ADD_PTR_I4:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 [[TMP13]]
// CHECK-P8-NEXT:    store i8* [[ADD_PTR_I4]], i8** [[__ADDR_I3]], align 8
// CHECK-P8-NEXT:    [[TMP14:%.*]] = load <1 x i128>, <1 x i128>* [[__VEC_ADDR_I]], align 16
// CHECK-P8-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[__ADDR_I3]], align 8
// CHECK-P8-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <1 x i128>*
// CHECK-P8-NEXT:    store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1
// CHECK-P8-NEXT:    ret void
//
void test10(vector unsigned __int128 *c, unsigned __int128 *st,
            const unsigned __int128 *ld) {
    *c = vec_xl(3ll, ld);
    vec_xst(*c, 7ll, st);
}
#endif
